"""Abstract base collator."""

import re

import collate.strings

class Collator(object):
    """Abstract base class for Collators.

    Attributes:
    locale - the collator follows rules for this locale
    encoding - assumed string encoding
    """

    locale = "C"
    encoding = "ascii"

    def __init__(self, locale=None, encoding=None):
        pass

    def cmp(self, string1, string2):
        """Return negative if a < b, zero if a == b, positive if a > b."""
        return cmp(self.key(string1), self.key(string2))

    def key(self, string):
        """Return a good sorting key for the string.
        
        The sort key should be considered an opaque value which is
        only meaningful when compared to other sort keys from the same
        collator.
        """
        return self.unicode(string)

    def words(self, string, sep=re.compile(r"(\s+)", re.UNICODE)):
        """Split the string into separate words."""
        return re.split(sep, self.unicode(string))

    def sortemekey(self, string):
        """Return a key based on sortemes of a string.

        A sorteme, by analogy with grapheme/morpheme/etc. is an atom
        of sort information. This is larger than a word boundry but
        smaller than a sentence boundry; roughly, a sorteme boundry
        occurs between letters and numbers, between numbers and
        numbers if 'too much' punctuation exists in between, between
        lines.
        """
        string = self.unicode(string)
        # Shove the sortkeyed original string on the end to resolve
        # ties intelligently.
        return (collate.strings.sortemes(string, self.key),
                self.key(string))

    def unicode(self, string):
        """Convert a str to a unicode using the collator encoding."""
        try:
            return unicode(string)
        except UnicodeError:
            return string.decode(self.encoding, 'replace')

    def str(self, string):
        """Convert a unicode to a str using the collator encoding."""
        try:
            return str(string)
        except UnicodeError:
            return string.encode(self.encoding, 'replace')

    def lstripwords(
        self, string, strip=collate.strings.INITIAL_STOPS, append=u", "):
        """Strip words and whitespace from the start of a string.

        If append is not empty, it and the words stripped from the
        front are appended to the end.
        """
        string = self.unicode(string)
        stripped = []
        words = self.words(string)
        while words and (words[0].isspace() or words[0].lower() in strip):
            stripped.append(words.pop(0))
        while stripped and stripped[-1].isspace():
            stripped.pop()
        if append and stripped:
            if words:
                words.append(append)
            words.extend(stripped)
        return u"".join(words)

    def lstripsortemekey(
        self, string, strip=collate.strings.INITIAL_STOPS, append=u", "):
        """Return a key based on sortemes of a prefix-stripped string."""
        string = self.unicode(string)
        stripped = self.lstripwords(string, strip, append)
        return (self.sortemekey(stripped), self.key(string))
